Bro Network Monitor

Data Link: https://www.dropbox.com/sh/7fo4efxhpenexqp/AACmuri_l-LDiVDUDJ3hVLqPa?dl=0

This tutorial is based on one from ClickSecurity's Data_hacking repo: https://github.com/carriegardner428/data_hacking/blob/master/contagio_traffic_analysis/contagio_traffic_analysis.ipynb

Collect Data Samples

This dataset represents samples of malicious network traffic from {}. Samples are classified into "APT", "CRIME", and "METASPLOIT", and are further divided into sample grouping



In [30]:

    
import os
import pandas as pd
from datetime import datetime



In [58]:

    
# Mapping of fields of the files we want to read in and initial setup of pandas dataframes
logs_to_process = {
                    'conn.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','proto','service','duration','orig_bytes','resp_bytes','conn_state','local_orig','missed_bytes','history','orig_pkts','orig_ip_bytes','resp_pkts','resp_ip_bytes','tunnel_parents','threat','sample'],
                    'dns.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','proto','trans_id','query','qclass','qclass_name','qtype','qtype_name','rcode','rcode_name','AA','TC','RD','RA','Z','answers','TTLs','rejected','threat','sample'],
                    'files.log' : ['ts','fuid','tx_hosts','rx_hosts','conn_uids','source','depth','analyzers','mime_type','filename','duration','local_orig','is_orig','seen_bytes','total_bytes','missing_bytes','overflow_bytes','timedout','parent_fuid','md5','sha1','sha256','extracted','threat','sample'],
                    'ftp.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','user','password','command','arg','mime_type','file_size','reply_code','reply_msg','data_channel.passive','data_channel.orig_h','data_channel.resp_h','data_channel.resp_p','fuid','threat','sample'],
                    'http.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','trans_depth','method','host','uri','referrer','user_agent','request_body_len','response_body_len','status_code','status_msg','info_code','info_msg','filename','tags','username','password','proxied','orig_fuids','orig_mime_types','resp_fuids','resp_mime_types','threat','sample'],
                    'notice.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','fuid','file_mime_type','file_desc','proto','note','msg','sub','src','dst','p','n','peer_descr','actions','suppress_for','dropped','remote_location.country_code','remote_location.region','remote_location.city','remote_location.latitude','remote_location.longitude','threat','sample'],
                    'signatures.log' : ['ts','src_addr','src_port','dst_addr','dst_port','note','sig_id','event_msg','sub_msg','sig_count','host_count','threat','sample'],
                    'smtp.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','trans_depth','helo','mailfrom','rcptto','date','from','to','reply_to','msg_id','in_reply_to','subject','x_originating_ip','first_received','second_received','last_reply','path','user_agent','fuids','is_webmail','threat','sample'],
                    'ssl.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','version','cipher','server_name','session_id','subject','issuer_subject','not_valid_before','not_valid_after','last_alert','client_subject','client_issuer_subject','cert_hash','validation_status','threat','sample'],
                    'tunnel.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','tunnel_type','action','threat','sample'],
                    'weird.log' : ['ts','uid','id.orig_h','id.orig_p','id.resp_h','id.resp_p','name','addl','notice','peer','threat','sample']
                  }

conndf   = pd.DataFrame(columns=logs_to_process['conn.log'])
dnsdf    = pd.DataFrame(columns=logs_to_process['dns.log'])
filesdf  = pd.DataFrame(columns=logs_to_process['files.log'])
ftpdf    = pd.DataFrame(columns=logs_to_process['ftp.log'])
httpdf   = pd.DataFrame(columns=logs_to_process['http.log'])
noticedf = pd.DataFrame(columns=logs_to_process['notice.log'])
sigdf    = pd.DataFrame(columns=logs_to_process['signatures.log'])
smtpdf   = pd.DataFrame(columns=logs_to_process['smtp.log'])
ssldf    = pd.DataFrame(columns=logs_to_process['ssl.log'])
tunneldf = pd.DataFrame(columns=logs_to_process['tunnel.log'])
weirddf  = pd.DataFrame(columns=logs_to_process['weird.log'])



In [59]:

    
def clean_timestamp(df):
    # Reference: https://github.com/carriegardner428/data_hacking/blob/master/contagio_traffic_analysis/contagio_traffic_analysis.ipynb
    # DROP RECORDS THAT DON'T HAVE A TIMESTAMP ('#CLOSE')
    df = df[df.ts.str.contains("#close") == False]
    df_time = [datetime.fromtimestamp(float(date)) for date in df['ts'].values ]
    df['timestamp'] = pd.Series(df_time)
    df.drop('ts', axis=1, inplace=True)
    df.set_index('timestamp', inplace=True)
    return df



In [60]:

    
for dirName, subdirList, fileList in os.walk('./data/PCAPS_TRAFFIC_PATTERNS/'):
    for fname in fileList:
        tags = dirName.split('/')
        if len(tags) == 5 and fname in logs_to_process:
            logname = fname.split('.')
            try:
                tempdf = pd.read_csv(dirName+'/'+fname, sep='\t',skiprows=8, header=None, 
                                     names=logs_to_process[fname][:-2])
                tempdf['threat'] = tags[3]
                tempdf['sample'] = tags[4]
                tempdf = clean_timestamp(tempdf)
                if tags[2] == "0":
                    print ('%s/%s' %(dirName, fname))
                if fname == 'conn.log':
                    conndf = conndf.append(tempdf)
                if fname == 'dns.log':
                    dnsdf = dnsdf.append(tempdf)
                if fname == 'files.log':
                    filesdf = filesdf.append(tempdf)
                if fname == 'ftp.log':
                    ftpdf = ftpdf.append(tempdf)
                if fname == 'http.log':
                    httpdf = httpdf.append(tempdf)
                if fname == 'notice.log':
                    noticedf = noticedf.append(tempdf)
                if fname == 'signatures.log':
                    sigdf = sigdf.append(tempdf)
                if fname == 'smtp.log':
                    smtpdf = smtpdf.append(tempdf)
                if fname == 'ssl.log':
                    ssldf = ssldf.append(tempdf)
                if fname == 'tunnel.log':
                    tunneldf = tunneldf.append(tempdf)
                if fname == 'weird.log':
                    weirddf = weirddf.append(tempdf)
            except Exception as e:
                print("Error: {}, on {}/{}".format(str(e), dirName, fname))









    



/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/ipykernel/__main__.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

Handling BAD, DIRTY Data!!!

Let's check the timestamp column



In [69]:

    
conndf.shape









    Out[69]:





(51743, 22)



In [74]:

    
conndf.sort_index(inplace=True)
conndf.index.year.unique()









    Out[74]:





Int64Index([2008, 2010, 2011, 2012, 2013], dtype='int64')

Where computers even around in 1969?

(Yes, LOL ;) ). But this sample is from roughly 2008-2013. Let's make a new df with records just in that time interval



In [68]:

    
conndf = conndf['2008':'2013']
conndf.shape









    Out[68]:





(51743, 22)

you can use df['start_time/date':'end_time/date'] to subset a collection of records in a timeframe



In [75]:

    
conndf.index.year.unique()









    Out[75]:





Int64Index([2008, 2010, 2011, 2012, 2013], dtype='int64')

Let's check the times of the other dataframes

dns.log



In [77]:

    
dnsdf.sort_index(inplace=True)
dnsdf.index.year.unique()









    Out[77]:





Float64Index([1969.0, 2010.0, 2011.0, 2012.0, 2013.0, nan], dtype='float64')



In [87]:

    
dnsdf.shape









    Out[87]:





(42835, 25)



In [88]:

    
dnsdf = dnsdf['2010':'2013']
dnsdf.shape









    Out[88]:





(35101, 25)

files.log



In [78]:

    
filesdf.sort_index(inplace=True)
filesdf.index.year.unique()









    Out[78]:





Int64Index([1969, 2008, 2010, 2011, 2012, 2013], dtype='int64')



In [89]:

    
filesdf.shape









    Out[89]:





(19289, 25)



In [90]:

    
filesdf = filesdf['2008':'2013']
filesdf.shape









    Out[90]:





(19087, 25)

ftp.log



In [79]:

    
ftpdf.sort_index(inplace=True)
ftpdf.index.year.unique()









    Out[79]:





Int64Index([1969, 2013], dtype='int64')



In [91]:

    
ftpdf.shape









    Out[91]:





(3, 21)



In [92]:

    
ftpdf = ftpdf['2013']
ftpdf.shape









    Out[92]:





(2, 21)

http.log



In [80]:

    
httpdf.sort_index(inplace=True)
httpdf.index.year.unique()









    Out[80]:





Int64Index([1969, 2008, 2010, 2011, 2012, 2013], dtype='int64')



In [93]:

    
httpdf.shape









    Out[93]:





(22927, 29)



In [94]:

    
httpdf = httpdf['2008':'2013']
httpdf.shape









    Out[94]:





(22798, 29)

notice.log



In [81]:

    
noticedf.sort_index(inplace=True)
noticedf.index.year.unique()









    Out[81]:





Int64Index([1969, 2011, 2012, 2013], dtype='int64')



In [95]:

    
noticedf.shape









    Out[95]:





(252, 28)



In [96]:

    
noticedf = noticedf['2011':'2013']
noticedf.shape









    Out[96]:





(248, 28)

sig.log



In [82]:

    
sigdf.sort_index(inplace=True)
sigdf.index.year.unique()









    Out[82]:





Int64Index([2012], dtype='int64')



In [97]:

    
sigdf.shape









    Out[97]:





(1, 13)

smtp.log



In [83]:

    
smtpdf.sort_index(inplace=True)
smtpdf.index.year.unique()









    Out[83]:





Int64Index([2012, 2013], dtype='int64')



In [99]:

    
smtpdf.shape









    Out[99]:





(4088, 27)

ssl.log



In [84]:

    
ssldf.sort_index(inplace=True)
ssldf.index.year.unique()









    Out[84]:





Int64Index([1969, 2011, 2012, 2013], dtype='int64')



In [100]:

    
ssldf.shape









    Out[100]:





(351, 21)



In [101]:

    
ssldf = ssldf['2011':'2013']
ssldf.shape









    Out[101]:





(346, 21)

tunnel.log



In [85]:

    
tunneldf.sort_index(inplace=True)
tunneldf.index.year.unique()









    Out[85]:





Int64Index([2013], dtype='int64')



In [102]:

    
tunneldf.shape









    Out[102]:





(2, 10)

weird.log



In [86]:

    
weirddf.sort_index(inplace=True)
weirddf.index.year.unique()









    Out[86]:





Int64Index([1969, 2011, 2012, 2013], dtype='int64')



In [103]:

    
weirddf.shape









    Out[103]:





(1081, 12)



In [105]:

    
weirddf = weirddf['2011':'2013']
weirddf.shape









    Out[105]:





(1037, 12)

Checking Other Features

NaNs
Single value features

conn.log



In [34]:

    
conndf.head()









    Out[34]:







  
    
      
      conn_state
      duration
      history
      id.orig_h
      id.orig_p
      id.resp_h
      id.resp_p
      local_orig
      missed_bytes
      orig_bytes
      ...
      orig_pkts
      proto
      resp_bytes
      resp_ip_bytes
      resp_pkts
      sample
      service
      threat
      tunnel_parents
      uid
    
  
  
    
      2013-02-03 17:20:28.793176
      SF
      21.708771
      Dd
      172.16.253.129
      68.0
      172.16.253.254
      67.0
      -
      0.0
      301
      ...
      1.0
      udp
      900
      984.0
      3.0
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      dhcp
      APT
      (empty)
      CI9ceD4wNSKUHA3wmd
    
    
      2013-02-03 17:20:49.634244
      S0
      0.863748
      D
      0.0.0.0
      68.0
      255.255.255.255
      67.0
      -
      0.0
      613
      ...
      2.0
      udp
      0
      0.0
      0.0
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      dhcp
      APT
      (empty)
      CmHWaWNDdsoHVqVsk
    
    
      2013-02-03 17:20:49.635240
      OTH
      0.000007
      -
      172.16.253.254
      8.0
      172.16.253.129
      0.0
      -
      0.0
      40
      ...
      2.0
      icmp
      0
      0.0
      0.0
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      -
      APT
      (empty)
      C82nAi1NNM7gxFM7T2
    
    
      2013-02-03 17:22:44.875318
      SF
      0.115893
      Dd
      172.16.253.129
      53.0
      8.8.8.8
      53.0
      -
      0.0
      41
      ...
      1.0
      udp
      57
      85.0
      1.0
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      dns
      APT
      (empty)
      CgldPv2fme1uRfxBG4
    
    
      2013-02-03 17:22:44.875379
      SF
      0.114048
      Dd
      172.16.253.129
      53.0
      4.2.2.2
      53.0
      -
      0.0
      41
      ...
      1.0
      udp
      57
      85.0
      1.0
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      dns
      APT
      (empty)
      CEAjpT2procZiZLkpl
    
  

5 rows × 21 columns



In [106]:

    
conndf.info()









    



<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 51743 entries, 2008-08-29 14:13:19.056673 to 2013-09-25 22:44:06.949527
Data columns (total 22 columns):
conn_state        51743 non-null object
duration          51743 non-null object
history           51743 non-null object
id.orig_h         51743 non-null object
id.orig_p         51743 non-null float64
id.resp_h         51743 non-null object
id.resp_p         51743 non-null float64
local_orig        51743 non-null object
missed_bytes      51743 non-null float64
orig_bytes        51743 non-null object
orig_ip_bytes     51743 non-null float64
orig_pkts         51743 non-null float64
proto             51743 non-null object
resp_bytes        51743 non-null object
resp_ip_bytes     51743 non-null float64
resp_pkts         51743 non-null float64
sample            51743 non-null object
service           51743 non-null object
threat            51743 non-null object
ts                0 non-null object
tunnel_parents    51743 non-null object
uid               51743 non-null object
dtypes: float64(7), object(15)
memory usage: 9.1+ MB



In [131]:

    
# Get categorical, object-type variables
conndf.select_dtypes(include=['object']).describe()









    Out[131]:







  
    
      
      conn_state
      duration
      history
      id.orig_h
      id.resp_h
      local_orig
      orig_bytes
      proto
      resp_bytes
      sample
      service
      threat
      tunnel_parents
      uid
    
  
  
    
      count
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
    
    
      unique
      13
      38352
      174
      111
      3861
      1
      3479
      3
      4629
      95
      9
      3
      2
      51743
    
    
      top
      RSTO
      -
      ShAfR
      192.168.248.165
      10.0.2.255
      -
      0
      tcp
      -
      BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321B...
      -
      CRIME
      (empty)
      Ccf1Kq4YilW9TJ9di6
    
    
      freq
      20739
      10920
      9974
      23641
      2756
      51743
      21169
      43381
      10920
      17510
      31585
      48725
      51741
      1



In [ ]:

    
conndf['local_orig'].unique()



In [135]:

    
conndf.tunnel_parents.value_counts()









    Out[135]:





(empty)              51741
CDlYaUKSPEa6CZWm6        2
Name: tunnel_parents, dtype: int64



In [136]:

    
conndf.conn_state.value_counts()









    Out[136]:





RSTO      20739
S0        11951
SF        10882
RSTRH      3269
SHR        1416
REJ        1253
RSTR       1045
OTH         479
RSTOS0      255
S3          128
SH          127
S1          117
S2           82
Name: conn_state, dtype: int64



In [134]:

    
# Get categorical, object-type variables
conndf.select_dtypes(exclude=['object']).describe()









    Out[134]:







  
    
      
      id.orig_p
      id.resp_p
      missed_bytes
      orig_ip_bytes
      orig_pkts
      resp_ip_bytes
      resp_pkts
    
  
  
    
      count
      51743.000000
      51743.000000
      5.174300e+04
      5.174300e+04
      51743.000000
      5.174300e+04
      51743.000000
    
    
      mean
      3527.059892
      1003.664302
      1.562934e+02
      1.257999e+03
      6.740351
      5.544762e+03
      7.629109
    
    
      std
      7916.361162
      3599.388695
      3.542275e+04
      5.133977e+04
      86.552072
      1.669139e+05
      123.460882
    
    
      min
      1.000000
      0.000000
      0.000000e+00
      0.000000e+00
      0.000000
      0.000000e+00
      0.000000
    
    
      25%
      1356.000000
      25.000000
      0.000000e+00
      7.800000e+01
      1.000000
      4.000000e+01
      1.000000
    
    
      50%
      2207.000000
      80.000000
      0.000000e+00
      1.680000e+02
      4.000000
      1.580000e+02
      2.000000
    
    
      75%
      3319.000000
      80.000000
      0.000000e+00
      5.080000e+02
      5.000000
      5.170000e+02
      4.000000
    
    
      max
      65529.000000
      63551.000000
      8.057592e+06
      1.074928e+07
      13493.000000
      3.046285e+07
      20340.000000

conndf TODOs:

- drop 'ts' column
- drop 'local_orig' column, 1 unique



In [126]:

    
conndf.drop('ts', axis=1, inplace=True)
conndf.drop('local_orig', axis=1, inplace=True)
conndf.info()









    



-------------
ValueErrorTraceback (most recent call last)
<ipython-input-126-2f7e63671cac> in <module>()
----> 1 conndf.drop('ts', axis=1, inplace=True)
      2 conndf.drop('local_orig', axis=1, inplace=True)
      3 conndf.info()

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/generic.py in drop(self, labels, axis, level, inplace, errors)
   2159                 new_axis = axis.drop(labels, level=level, errors=errors)
   2160             else:
-> 2161                 new_axis = axis.drop(labels, errors=errors)
   2162             dropped = self.reindex(**{axis_name: new_axis})
   2163             try:

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
   3622             if errors != 'ignore':
   3623                 raise ValueError('labels %s not contained in axis' %
-> 3624                                  labels[mask])
   3625             indexer = indexer[~mask]
   3626         return self.delete(indexer)

ValueError: labels ['ts'] not contained in axis

dns.log



In [15]:

    
dnsdf.head()









    Out[15]:







  
    
      
      AA
      RA
      RD
      TC
      TTLs
      Z
      answers
      id.orig_h
      id.orig_p
      id.resp_h
      ...
      qtype
      qtype_name
      query
      rcode
      rcode_name
      rejected
      sample
      threat
      trans_id
      uid
    
  
  
    
      2013-02-03 17:22:44.875379
      F
      T
      T
      F
      700
      0.0
      192.200.99.194
      172.16.253.129
      53.0
      4.2.2.2
      ...
      1
      A
      sa.foundcloudsearch.com
      0
      NOERROR
      F
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      APT
      27825.0
      CEAjpT2procZiZLkpl
    
    
      2013-02-03 17:22:44.875318
      F
      T
      T
      F
      2502
      0.0
      192.200.99.194
      172.16.253.129
      53.0
      8.8.8.8
      ...
      1
      A
      sa.foundcloudsearch.com
      0
      NOERROR
      F
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      APT
      27825.0
      CgldPv2fme1uRfxBG4
    
    
      2013-01-05 22:45:24.874503
      F
      F
      F
      F
      -
      0.0
      -
      172.16.253.129
      1143.0
      199.36.76.113
      ...
      -
      -
      -
      0
      NOERROR
      F
      BIN_9002_D4ED654BCDA42576FDDFE03361608CAA_2013...
      APT
      0.0
      CbH3Rf1eTZNnvfSek7
    
    
      2013-01-05 22:45:44.391876
      F
      F
      F
      F
      -
      0.0
      -
      172.16.253.129
      1143.0
      199.36.76.113
      ...
      -
      -
      -
      -
      -
      T
      BIN_9002_D4ED654BCDA42576FDDFE03361608CAA_2013...
      APT
      0.0
      CbH3Rf1eTZNnvfSek7
    
    
      2013-01-05 22:47:24.922913
      F
      F
      F
      F
      -
      0.0
      -
      172.16.253.129
      1143.0
      199.36.76.113
      ...
      -
      -
      -
      -
      -
      F
      BIN_9002_D4ED654BCDA42576FDDFE03361608CAA_2013...
      APT
      0.0
      CbH3Rf1eTZNnvfSek7
    
  

5 rows × 24 columns



In [137]:

    
dnsdf.info()









    



<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35101 entries, 2010-02-13 09:23:18.490261 to 2013-09-25 22:37:25.795640
Data columns (total 25 columns):
AA             35101 non-null object
RA             35101 non-null object
RD             35101 non-null object
TC             35101 non-null object
TTLs           35101 non-null object
Z              35101 non-null float64
answers        35101 non-null object
id.orig_h      35101 non-null object
id.orig_p      35101 non-null float64
id.resp_h      35101 non-null object
id.resp_p      35101 non-null float64
proto          35101 non-null object
qclass         35101 non-null object
qclass_name    35101 non-null object
qtype          35101 non-null object
qtype_name     35101 non-null object
query          35101 non-null object
rcode          35101 non-null object
rcode_name     35101 non-null object
rejected       35101 non-null object
sample         35101 non-null object
threat         35101 non-null object
trans_id       35101 non-null float64
ts             0 non-null object
uid            35101 non-null object
dtypes: float64(4), object(21)
memory usage: 7.0+ MB



In [138]:

    
# Get categorical, object-type variables
dnsdf.select_dtypes(include=['object']).describe()









    Out[138]:







  
    
      
      AA
      RA
      RD
      TC
      TTLs
      answers
      id.orig_h
      id.resp_h
      proto
      qclass
      ...
      qtype
      qtype_name
      query
      rcode
      rcode_name
      rejected
      sample
      threat
      ts
      uid
    
  
  
    
      count
      35101
      35101
      35101
      35101
      35101
      35101
      35101
      35101
      35101
      35101
      ...
      35101
      35101
      35101
      35101
      35101
      35101
      35101
      35101
      0.0
      35101
    
    
      unique
      2
      2
      2
      1
      3937
      3164
      42
      50
      2
      3
      ...
      13
      9
      3195
      10
      7
      2
      82
      2
      0.0
      4803
    
    
      top
      F
      F
      T
      F
      -
      -
      192.168.248.165
      10.0.2.255
      udp
      1
      ...
      32
      NB
      NOLOGO1093.COM
      -
      -
      F
      BIN_CitadelPacked_2012-05
      CRIME
      NaN
      CEAizT2ntuLyinqtPa
    
    
      freq
      35098
      27552
      32444
      35101
      27706
      27706
      13673
      13013
      34714
      26103
      ...
      13875
      15167
      6544
      22183
      22183
      34395
      13395
      33893
      NaN
      3415
    
  

4 rows × 21 columns



In [139]:

    
dnsdf.rejected.value_counts()









    Out[139]:





F    34395
T      706
Name: rejected, dtype: int64



In [140]:

    
dnsdf.qclass_name.value_counts()









    Out[140]:





C_INTERNET    34563
-               538
Name: qclass_name, dtype: int64



In [141]:

    
dnsdf.TC.value_counts()









    Out[141]:





F    35101
Name: TC, dtype: int64



In [142]:

    
# Get categorical, object-type variables
dnsdf.select_dtypes(exclude=['object']).describe()









    Out[142]:







  
    
      
      Z
      id.orig_p
      id.resp_p
      trans_id
    
  
  
    
      count
      35101.000000
      35101.000000
      35101.000000
      35101.000000
    
    
      mean
      0.396257
      1486.173984
      121.646705
      32466.772314
    
    
      std
      0.489126
      8516.369410
      411.977287
      13436.185471
    
    
      min
      0.000000
      53.000000
      53.000000
      0.000000
    
    
      25%
      0.000000
      53.000000
      53.000000
      27352.000000
    
    
      50%
      0.000000
      137.000000
      53.000000
      33968.000000
    
    
      75%
      1.000000
      137.000000
      137.000000
      36884.000000
    
    
      max
      1.000000
      65529.000000
      5355.000000
      65534.000000



In [143]:

    
dnsdf.Z.value_counts()









    Out[143]:





0.0    21192
1.0    13909
Name: Z, dtype: int64

dnsdf TODOs:

- drop 'ts' column
- drop 'tc' column, 1 unique



In [145]:

    
dnsdf.drop('ts', axis=1, inplace=True)
dnsdf.drop('TC', axis=1, inplace=True)
dnsdf.head()









    



-------------
ValueErrorTraceback (most recent call last)
<ipython-input-145-d2c7e3d6b3ef> in <module>()
----> 1 dnsdf.drop('ts', axis=1, inplace=True)
      2 dnsdf.drop('TC', axis=1, inplace=True)
      3 dnsdf.head()

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/generic.py in drop(self, labels, axis, level, inplace, errors)
   2159                 new_axis = axis.drop(labels, level=level, errors=errors)
   2160             else:
-> 2161                 new_axis = axis.drop(labels, errors=errors)
   2162             dropped = self.reindex(**{axis_name: new_axis})
   2163             try:

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
   3622             if errors != 'ignore':
   3623                 raise ValueError('labels %s not contained in axis' %
-> 3624                                  labels[mask])
   3625             indexer = indexer[~mask]
   3626         return self.delete(indexer)

ValueError: labels ['ts'] not contained in axis

file.log



In [16]:

    
filesdf.head()









    Out[16]:







  
    
      
      analyzers
      conn_uids
      depth
      duration
      extracted
      filename
      fuid
      is_orig
      local_orig
      md5
      ...
      rx_hosts
      sample
      seen_bytes
      sha1
      sha256
      source
      threat
      timedout
      total_bytes
      tx_hosts
    
  
  
    
      2012-10-07 10:45:05.260590
      MD5,SHA1
      ClpERK2U3EJh2Sp7zj
      0.0
      0.000000
      -
      -
      FF3ky62ppNbz5xz0Z2
      F
      -
      4dec45889e09ec3ceb63fd65825d0f11
      ...
      172.16.253.130
      BIN_DNSWatch_protux_4F8A44EF66384CCFAB737C8D7A...
      564.0
      d80eaf048573a410cb6c49ebb859280d04b6113c
      -
      HTTP
      APT
      F
      564
      82.96.118.210
    
    
      2011-07-19 19:25:04.678615
      SHA1,MD5
      CFxQyaLHN8gDJbqQ3
      0.0
      0.000000
      -
      -
      Fx05J53kcY5j1YUwfc
      F
      -
      7eb1866f3e37ba63e7d9a2762536d0e4
      ...
      172.29.0.116
      BIN_LetsGo_yahoosb_b21ba443726385c11802a8ad731...
      131.0
      413dcf4056b982ffd385a92d265458fc66f1ac5d
      -
      HTTP
      APT
      F
      131
      58.64.232.19
    
    
      2011-07-19 19:25:04.974835
      SHA1,MD5
      CFxQyaLHN8gDJbqQ3
      0.0
      11.564372
      -
      -
      F5Vnf2F0SEKDwTpKb
      F
      -
      2eca000bd0a628a6905f898343a4a449
      ...
      172.29.0.116
      BIN_LetsGo_yahoosb_b21ba443726385c11802a8ad731...
      282317.0
      49ded6741d9996eb759c5e2b6ab2f6ec2e5faf13
      -
      HTTP
      APT
      F
      289628
      58.64.232.19
    
    
      2012-10-07 10:34:52.937534
      MD5,SHA1
      CUBtNl42Jz57iOSkb5
      0.0
      0.000000
      -
      -
      F6UVLD2KuRsWDNNgRj
      F
      -
      23d6b92bc7eb100fc1294e6b124b7e75
      ...
      172.16.253.129
      BIN_Likseput_E019E37F19040059AB5662563F06B609_...
      1635.0
      f0649f9495d1f566a3f690002050b87800b4bce2
      -
      HTTP
      APT
      F
      1635
      205.209.161.13
    
    
      2012-10-07 17:50:41.483734
      SHA1,MD5
      CtH2QO2HwW4Zu2gVl1
      0.0
      0.000000
      -
      -
      FOvQBu21DhExBT6OAh
      F
      -
      2e8ba2d348ff0ae9e930efd1c7218d25
      ...
      172.16.253.240
      BIN_Mediana_0AE47E3261EA0A2DBCE471B28DFFE007_2...
      522.0
      ad5c88987c80091eff1cf7915ef346016dcd644b
      -
      HTTP
      APT
      F
      522
      118.69.36.139
    
  

5 rows × 24 columns



In [147]:

    
filesdf.info()









    



<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 19087 entries, 2008-08-29 14:13:19.062835 to 2013-09-25 22:22:33.349323
Data columns (total 25 columns):
analyzers         19087 non-null object
conn_uids         19087 non-null object
depth             19087 non-null float64
duration          19087 non-null float64
extracted         19087 non-null object
filename          19087 non-null object
fuid              19087 non-null object
is_orig           19087 non-null object
local_orig        19087 non-null object
md5               19087 non-null object
mime_type         19087 non-null object
missing_bytes     19087 non-null float64
overflow_bytes    19087 non-null float64
parent_fuid       19087 non-null object
rx_hosts          19087 non-null object
sample            19087 non-null object
seen_bytes        19087 non-null float64
sha1              19087 non-null object
sha256            19087 non-null object
source            19087 non-null object
threat            19087 non-null object
timedout          19087 non-null object
total_bytes       19087 non-null object
ts                0 non-null object
tx_hosts          19087 non-null object
dtypes: float64(5), object(20)
memory usage: 3.8+ MB



In [148]:

    
# Get categorical, object-type variables
filesdf.select_dtypes(include=['object']).describe()









    Out[148]:







  
    
      
      analyzers
      conn_uids
      extracted
      filename
      fuid
      is_orig
      local_orig
      md5
      mime_type
      parent_fuid
      rx_hosts
      sample
      sha1
      sha256
      source
      threat
      timedout
      total_bytes
      ts
      tx_hosts
    
  
  
    
      count
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      19087
      0.0
      19087
    
    
      unique
      3
      10005
      1
      54
      19076
      2
      1
      8884
      26
      1
      363
      77
      8884
      1
      3
      3
      2
      3552
      0.0
      1449
    
    
      top
      SHA1,MD5
      CH79DM3zLqVjZZQTe4
      -
      -
      FwQUph2Sl6wHRXzhNj
      F
      -
      e6e2b839691b59a1f28adbe3946ed168
      text/plain
      -
      172.29.0.116
      purplehaze
      f82bbc37c4deec14342dc6bb83ccf5fa3453d55a
      -
      HTTP
      CRIME
      F
      -
      NaN
      65.55.37.120
    
    
      freq
      11665
      180
      19087
      19023
      9
      16738
      19087
      1889
      7351
      19087
      9241
      9141
      1889
      19087
      15281
      18972
      19081
      6910
      NaN
      1856



In [149]:

    
# Get numerical, object-type variables
filesdf.select_dtypes(exclude=['object']).describe()









    Out[149]:







  
    
      
      depth
      duration
      missing_bytes
      overflow_bytes
      seen_bytes
    
  
  
    
      count
      19087.000000
      19087.000000
      19087.00000
      19087.0
      1.908700e+04
    
    
      mean
      0.516058
      0.074782
      1.66967
      0.0
      1.103566e+04
    
    
      std
      1.193498
      1.264469
      212.06107
      0.0
      1.374042e+05
    
    
      min
      0.000000
      0.000000
      0.00000
      0.0
      0.000000e+00
    
    
      25%
      0.000000
      0.000000
      0.00000
      0.0
      1.250000e+02
    
    
      50%
      0.000000
      0.000000
      0.00000
      0.0
      7.500000e+02
    
    
      75%
      0.000000
      0.000561
      0.00000
      0.0
      2.139000e+03
    
    
      max
      28.000000
      96.395087
      29200.00000
      0.0
      9.823962e+06



In [150]:

    
filesdf.overflow_bytes.value_counts()









    Out[150]:





0.0    19087
Name: overflow_bytes, dtype: int64

filesdf TODOs:

- drop 'ts' column
- drop 'extracted' column, 1 unique
- drop 'local_orig' column, 1 unique
- drop 'parent_fuid' column, 1 unique
- drop 'sha256' column, 1 unique
- drop 'overflow_bytes', column, 1 unique



In [151]:

    
filesdf.drop('ts', axis=1, inplace=True)
filesdf.drop('extracted', axis=1, inplace=True)
filesdf.drop('local_orig', axis=1, inplace=True)
filesdf.drop('parent_fuid', axis=1, inplace=True)
filesdf.drop('sha256', axis=1, inplace=True)
filesdf.drop('overflow_bytes', axis=1, inplace=True)
filesdf.head()









    Out[151]:







  
    
      
      analyzers
      conn_uids
      depth
      duration
      filename
      fuid
      is_orig
      md5
      mime_type
      missing_bytes
      rx_hosts
      sample
      seen_bytes
      sha1
      source
      threat
      timedout
      total_bytes
      tx_hosts
    
  
  
    
      2008-08-29 14:13:19.062835
      MD5,SHA1
      CPeRxP175U0yYVakOb
      0.0
      0.000000
      -
      Fnkq5O2AudT9jKaGzf
      F
      832121a4d54b100333d1a735105f88c2
      text/html
      0.0
      192.168.0.22
      Microsoft_SQL_Server_Distributed_Management_Ob...
      2767.0
      d631518d7222a213a38a0c6f269bc7f6aeebd047
      HTTP
      METASPLOIT
      F
      2767
      192.168.0.15
    
    
      2008-08-29 14:17:24.426910
      MD5,SHA1
      C7ihYF3xcZL9DKkei
      0.0
      1.825023
      -
      Fef9fG1P0WvDiHsgW
      F
      f30dbac8dfb321cc3ad7c8d80d131104
      text/plain
      0.0
      192.168.0.22
      iMesh_7.1.0.x(IMWeb.dll_7.0.0.x)_Remote_Heap_O...
      4831.0
      9cc71a6c6e1608a6e44b3f4128e932f15e12108e
      HTTP
      METASPLOIT
      F
      4831
      192.168.0.15
    
    
      2008-08-29 14:18:47.261285
      MD5,SHA1
      CNHkrR3vyXKjNqqqgh
      0.0
      0.000000
      -
      FXdjVH10pT9gjAt2p6
      F
      ea95d16406510979d2dbcba148ec65d4
      text/html
      0.0
      192.168.0.22
      NUVICO_DVR_NVDV4__PdvrAtl_Module_(PdvrAt.DLL_1...
      2494.0
      8f41aa8ac666cbdefede034a7a3cd8a23270646b
      HTTP
      METASPLOIT
      F
      2494
      192.168.0.15
    
    
      2008-08-29 14:20:28.469907
      SHA1,MD5
      CGAPRy3uHLtMEv0hh4
      0.0
      0.000000
      -
      FPZcO9wMy7NtiMek
      F
      b6455078596a63579edd93053688c664
      text/html
      0.0
      192.168.0.22
      Sejoong_Namo_ActiveSquare_6_NamoInstaller.dll-...
      1550.0
      6bc5bf13ecf95a7f0f151789bcb099408569bed5
      HTTP
      METASPLOIT
      F
      1550
      192.168.0.15
    
    
      2008-08-29 14:20:32.933439
      SHA1,MD5
      CUBJsy3BQVEkizsKtc
      0.0
      0.000000
      -
      FC0N0T2JFiZcFr63x6
      F
      60784130c96e15301590ed3d757dbed7
      text/html
      0.0
      192.168.0.22
      Yahoo_Music_Jukebox_2.2-AddImage()_ActiveX_Rem...
      2811.0
      0745e344d2b7ff1ed21ee9bb5fbf4bcca06d5c4c
      HTTP
      METASPLOIT
      F
      2811
      192.168.0.15

ftp.log



In [18]:

    
ftpdf.head(5)  # there are only 3 records









    Out[18]:







  
    
      
      arg
      command
      data_channel.orig_h
      data_channel.passive
      data_channel.resp_h
      data_channel.resp_p
      file_size
      fuid
      id.orig_h
      id.orig_p
      id.resp_h
      id.resp_p
      mime_type
      password
      reply_code
      reply_msg
      sample
      threat
      uid
      user
    
  
  
    
      1969-12-31 19:00:25.910034
      10,0,2,15,4,24
      PORT
      109.234.159.254
      F
      10.0.2.15
      1048
      -
      -
      10.0.2.15
      1047.0
      109.234.159.254
      21.0
      -
      <hidden>
      500.0
      \xd0\x9d\xd0\xb5\xd0\xbf\xd1\x80\xd0\xb0\xd0\x...
      BIN_Reedum_0ca4f93a848cf01348336a8c6ff22daf_20...
      CRIME
      Cgtteu3aOCDB2bcjXb
      user37704
    
    
      2013-03-07 05:58:39.873228
      -
      PASV
      10.0.2.15
      T
      193.109.247.77
      44279
      -
      -
      10.0.2.15
      1040.0
      193.109.247.77
      21.0
      -
      <hidden>
      227.0
      Entering Passive Mode (193,109,247,77,172,247)
      BIN_UStealD_2b796f11f15e8c73f8f69180cf74b39d
      CRIME
      CD9RqvrS2B3ppkDXf
      0jeck1072
    
    
      2013-03-07 05:58:40.110549
      ftp://193.109.247.77/./home/zxcvb/sdfhdrjf/NO_...
      STOR
      -
      -
      -
      -
      -
      F0Lt5h2pYsjpmv03el
      10.0.2.15
      1040.0
      193.109.247.77
      21.0
      binary
      <hidden>
      226.0
      Logout.
      BIN_UStealD_2b796f11f15e8c73f8f69180cf74b39d
      CRIME
      CD9RqvrS2B3ppkDXf
      0jeck1072



In [152]:

    
ftpdf.info()









    



<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2 entries, 2013-03-07 05:58:39.873228 to 2013-03-07 05:58:40.110549
Data columns (total 21 columns):
arg                     2 non-null object
command                 2 non-null object
data_channel.orig_h     2 non-null object
data_channel.passive    2 non-null object
data_channel.resp_h     2 non-null object
data_channel.resp_p     2 non-null object
file_size               2 non-null object
fuid                    2 non-null object
id.orig_h               2 non-null object
id.orig_p               2 non-null float64
id.resp_h               2 non-null object
id.resp_p               2 non-null float64
mime_type               2 non-null object
password                2 non-null object
reply_code              2 non-null float64
reply_msg               2 non-null object
sample                  2 non-null object
threat                  2 non-null object
ts                      0 non-null object
uid                     2 non-null object
user                    2 non-null object
dtypes: float64(3), object(18)
memory usage: 352.0+ bytes



In [153]:

    
# Get categorical, object-type variables
ftpdf.select_dtypes(include=['object']).describe()









    Out[153]:







  
    
      
      arg
      command
      data_channel.orig_h
      data_channel.passive
      data_channel.resp_h
      data_channel.resp_p
      file_size
      fuid
      id.orig_h
      id.resp_h
      mime_type
      password
      reply_msg
      sample
      threat
      ts
      uid
      user
    
  
  
    
      count
      2
      2
      2
      2
      2
      2
      2
      2
      2
      2
      2
      2
      2
      2
      2
      0.0
      2
      2
    
    
      unique
      2
      2
      2
      2
      2
      2
      1
      2
      1
      1
      2
      1
      2
      1
      1
      0.0
      1
      1
    
    
      top
      ftp://193.109.247.77/./home/zxcvb/sdfhdrjf/NO_...
      STOR
      -
      T
      193.109.247.77
      44279
      -
      F0Lt5h2pYsjpmv03el
      10.0.2.15
      193.109.247.77
      binary
      <hidden>
      Entering Passive Mode (193,109,247,77,172,247)
      BIN_UStealD_2b796f11f15e8c73f8f69180cf74b39d
      CRIME
      NaN
      CD9RqvrS2B3ppkDXf
      0jeck1072
    
    
      freq
      1
      1
      1
      1
      1
      1
      2
      1
      2
      2
      1
      2
      1
      2
      2
      NaN
      2
      2



In [154]:

    
# Get numerical, object-type variables
ftpdf.select_dtypes(exclude=['object']).describe()

ftpdf TODOs:

- drop 'ts' column
- drop 'file_size' column, 1 unique
- drop 'id.orig_h' column, 1 unique
- drop 'id.resp_h' column, 1 unique
- drop 'password' column, 1 unique
- drop 'user', column, 1 unique



In [155]:

    
ftpdf.drop('ts', axis=1, inplace=True)
ftpdf.drop('file_size', axis=1, inplace=True)
ftpdf.drop('id.orig_h', axis=1, inplace=True)
ftpdf.drop('id.resp_h', axis=1, inplace=True)
ftpdf.drop('password', axis=1, inplace=True)
ftpdf.drop('user', axis=1, inplace=True)
ftpdf.head()









    Out[155]:







  
    
      
      arg
      command
      data_channel.orig_h
      data_channel.passive
      data_channel.resp_h
      data_channel.resp_p
      fuid
      id.orig_p
      id.resp_p
      mime_type
      reply_code
      reply_msg
      sample
      threat
      uid
    
  
  
    
      2013-03-07 05:58:39.873228
      -
      PASV
      10.0.2.15
      T
      193.109.247.77
      44279
      -
      1040.0
      21.0
      -
      227.0
      Entering Passive Mode (193,109,247,77,172,247)
      BIN_UStealD_2b796f11f15e8c73f8f69180cf74b39d
      CRIME
      CD9RqvrS2B3ppkDXf
    
    
      2013-03-07 05:58:40.110549
      ftp://193.109.247.77/./home/zxcvb/sdfhdrjf/NO_...
      STOR
      -
      -
      -
      -
      F0Lt5h2pYsjpmv03el
      1040.0
      21.0
      binary
      226.0
      Logout.
      BIN_UStealD_2b796f11f15e8c73f8f69180cf74b39d
      CRIME
      CD9RqvrS2B3ppkDXf

http.log



In [174]:

    
httpdf.head(5)









    Out[174]:







  
    
      
      filename
      host
      id.orig_h
      id.orig_p
      id.resp_h
      id.resp_p
      info_code
      info_msg
      method
      orig_fuids
      ...
      status_code
      status_msg
      tags
      threat
      trans_depth
      ts
      uid
      uri
      user_agent
      username
    
  
  
    
      2008-08-29 14:13:19.060122
      -
      freak
      192.168.0.22
      3590.0
      192.168.0.15
      80.0
      -
      -
      GET
      -
      ...
      200
      OK
      (empty)
      METASPLOIT
      1.0
      NaN
      CPeRxP175U0yYVakOb
      /spoil/4398.htm
      Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...
      -
    
    
      2008-08-29 14:17:24.424171
      -
      freak
      192.168.0.22
      3750.0
      192.168.0.15
      80.0
      -
      -
      GET
      -
      ...
      200
      OK
      (empty)
      METASPLOIT
      1.0
      NaN
      C7ihYF3xcZL9DKkei
      /spoil/4752.htm
      Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...
      -
    
    
      2008-08-29 14:18:47.259045
      -
      freak
      192.168.0.22
      3817.0
      192.168.0.15
      80.0
      -
      -
      GET
      -
      ...
      200
      OK
      (empty)
      METASPLOIT
      1.0
      NaN
      CNHkrR3vyXKjNqqqgh
      /spoil/4903.htm
      Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...
      -
    
    
      2008-08-29 14:20:28.467682
      -
      freak
      192.168.0.22
      3912.0
      192.168.0.15
      80.0
      -
      -
      GET
      -
      ...
      200
      OK
      (empty)
      METASPLOIT
      1.0
      NaN
      CGAPRy3uHLtMEv0hh4
      /spoil/5045.htm
      Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...
      -
    
    
      2008-08-29 14:20:32.930681
      -
      freak
      192.168.0.22
      3917.0
      192.168.0.15
      80.0
      -
      -
      GET
      -
      ...
      200
      OK
      (empty)
      METASPLOIT
      1.0
      NaN
      CUBJsy3BQVEkizsKtc
      /spoil/5048.htm
      Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...
      -
    
  

5 rows × 29 columns



In [156]:

    
httpdf.info()









    



<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 22798 entries, 2008-08-29 14:13:19.060122 to 2013-09-25 22:22:33.001145
Data columns (total 29 columns):
filename             22798 non-null object
host                 22798 non-null object
id.orig_h            22798 non-null object
id.orig_p            22798 non-null float64
id.resp_h            22798 non-null object
id.resp_p            22798 non-null float64
info_code            22798 non-null object
info_msg             22798 non-null object
method               22798 non-null object
orig_fuids           22798 non-null object
orig_mime_types      22798 non-null object
password             22798 non-null object
proxied              22798 non-null object
referrer             22798 non-null object
request_body_len     22798 non-null float64
resp_fuids           22798 non-null object
resp_mime_types      22798 non-null object
response_body_len    22798 non-null float64
sample               22798 non-null object
status_code          22798 non-null object
status_msg           22798 non-null object
tags                 22798 non-null object
threat               22798 non-null object
trans_depth          22798 non-null float64
ts                   0 non-null object
uid                  22798 non-null object
uri                  22798 non-null object
user_agent           22798 non-null object
username             22798 non-null object
dtypes: float64(5), object(24)
memory usage: 5.2+ MB



In [ ]:



In [ ]:



In [ ]:



In [20]:

    
noticedf.head()









    Out[20]:







  
    
      
      actions
      dropped
      dst
      file_desc
      file_mime_type
      fuid
      id.orig_h
      id.orig_p
      id.resp_h
      id.resp_p
      ...
      remote_location.country_code
      remote_location.latitude
      remote_location.longitude
      remote_location.region
      sample
      src
      sub
      suppress_for
      threat
      uid
    
  
  
    
      2013-01-05 22:33:52.306981
      Notice::ACTION_LOG
      F
      173.231.54.69
      -
      -
      -
      172.16.253.129
      1144
      173.231.54.69
      443
      ...
      -
      -
      -
      -
      BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...
      172.16.253.129
      emailAddress=root@10_01.lvqiucai,CN=10_01.lvqi...
      3600.0
      APT
      CA8hE83VihcR5GQhfl
    
    
      2013-02-03 21:50:15.936942
      Notice::ACTION_LOG
      F
      96.44.136.98
      -
      -
      -
      172.16.253.130
      1068
      96.44.136.98
      443
      ...
      -
      -
      -
      -
      BIN_TrojanPage_86893886C7CBC7310F7675F4EFDE0A29
      172.16.253.130
      CN=eric-office
      3600.0
      APT
      CbjBsy1JWM4tZOK96h
    
    
      2012-03-04 01:48:35.169930
      Notice::ACTION_LOG
      F
      199.192.156.134
      -
      -
      -
      10.0.2.15
      1381
      199.192.156.134
      443
      ...
      -
      -
      -
      -
      Mswab_Yayih_FD1BE09E499E8E380424B3835FC973A8_2...
      10.0.2.15
      POST /bbs/info.asp HTTP/1.1^M^JHost: 199.192.1...
      3600.0
      APT
      CjRS9R3lXpmpB3zoM1
    
    
      2011-12-07 12:53:01.030172
      Notice::ACTION_LOG
      F
      71.36.88.82
      -
      -
      -
      192.168.248.135
      1080
      71.36.88.82
      443
      ...
      -
      -
      -
      -
      PDF_CVE-2011-2462_Pdf_2011-12
      192.168.248.135
      emailAddress=marry.smith@ltu.edu,CN=ITU Server...
      3600.0
      APT
      CvK5cnUyfWRYthydh
    
    
      2012-10-04 10:27:06.802839
      Notice::ACTION_LOG
      F
      62.65.252.16
      -
      -
      -
      192.168.248.165
      1113
      62.65.252.16
      443
      ...
      -
      -
      -
      -
      BIN_Cutwail-Pushdo(1)_582DE032477E099EB1024D84...
      192.168.248.165
      emailAddress=root@localhost.localdomain,CN=loc...
      3600.0
      CRIME
      C3Z4pO1ogbk64yr3l3
    
  

5 rows × 27 columns



In [21]:

    
sigdf.head() # only 1 record









    Out[21]:







  
    
      
      dst_addr
      dst_port
      event_msg
      host_count
      note
      sample
      sig_count
      sig_id
      src_addr
      src_port
      sub_msg
      threat
    
  
  
    
      2012-03-04 01:48:35.169930
      199.192.156.134
      443.0
      10.0.2.15: ATTACK-RESPONSES Microsoft cmd.exe ...
      -
      Signatures::Sensitive_Signature
      Mswab_Yayih_FD1BE09E499E8E380424B3835FC973A8_2...
      -
      windows_reverse_shell
      10.0.2.15
      1381.0
      POST /bbs/info.asp HTTP/1.1^M^JHost: 199.192.1...
      APT



In [24]:

    
smtpdf.head(5)









    Out[24]:







  
    
      
      date
      first_received
      from
      fuids
      helo
      id.orig_h
      id.orig_p
      id.resp_h
      id.resp_p
      in_reply_to
      ...
      reply_to
      sample
      second_received
      subject
      threat
      to
      trans_depth
      uid
      user_agent
      x_originating_ip
    
  
  
    
      2012-10-07 11:41:30.928559
      -
      -
      -
      (empty)
      comxp
      172.16.253.129
      1138.0
      119.161.5.253
      25.0
      -
      ...
      -
      BIN_Sanny-Daws_338D0B855421867732E05399A2D5667...
      -
      -
      APT
      -
      1.0
      CftBECCezhXLmn0mk
      -
      -
    
    
      2012-10-07 11:41:25.734884
      -
      -
      -
      (empty)
      comxp
      172.16.253.129
      1136.0
      119.161.5.253
      25.0
      -
      ...
      -
      BIN_Sanny-Daws_338D0B855421867732E05399A2D5667...
      -
      -
      APT
      -
      1.0
      CbTJom4G7Z1VlFRTQ2
      -
      -
    
    
      2013-02-03 17:21:34.988497
      Thu, 6 Jun 2013 9:46:41 --0400
      -
      <linux06400@yahoo.com>
      FzYuGG1z0tXTRdpYYa,FhyrAm2El1XO5EppKb,Fl1Nbz4c...
      DELLXT
      172.16.253.129
      1043.0
      63.250.193.228
      587.0
      -
      ...
      -
      BIN_ArdamaxKeylogger_E33AF9E602CBB7AC3634C2608...
      -
      Logs from "Laura"
      CRIME
      <linux06400@yahoo.com>
      1.0
      CTnh6S15ye0PetpT37
      Microsoft Outlook Express 6.00.2900.2527
      -
    
    
      2012-10-04 10:31:21.846949
      Fri, 05 Jul 2013  10:44:58 -0400
      -
      Next Day Air Saver <message_id16@olgapost.com>
      F7H5tDVaYoYDcVV17,FOkGj71hWX7IW97PJf
      olgapost.com
      192.168.248.165
      1136.0
      65.54.188.72
      25.0
      -
      ...
      -
      BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321B...
      -
      Delivery Status Notification
      CRIME
      <wackcircadian@hotmail.com>
      1.0
      CgRXC74fB2u221w76
      Microsoft Outlook Express 6.00.2900.2180
      -
    
    
      2012-10-04 10:31:21.851972
      Fri, 05 Jul 2013  10:44:58 -0400
      -
      Expedited Shipping <federal_id27@scooterspost....
      FycO1144ov4qK4VXZ7,FClo3r2q71JhTXDjF2
      scooterspost.com
      192.168.248.165
      1140.0
      65.55.37.120
      25.0
      -
      ...
      -
      BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321B...
      -
      Delivery Status Notification ID#EN55073086P
      CRIME
      <wackaloonusa@hotmail.com>
      1.0
      CXa3qf4JoCVQyjmEG
      Microsoft Outlook Express 6.00.2900.2180
      -
    
  

5 rows × 26 columns



In [25]:

    
ssldf.head()









    Out[25]:







  
    
      
      cert_hash
      cipher
      client_issuer_subject
      client_subject
      id.orig_h
      id.orig_p
      id.resp_h
      id.resp_p
      issuer_subject
      last_alert
      not_valid_after
      not_valid_before
      sample
      server_name
      session_id
      subject
      threat
      uid
      validation_status
      version
    
  
  
    
      2013-01-05 22:33:52.068830
      d666a272910c12b764a297f1e63c35c7
      TLS_RSA_WITH_RC4_128_MD5
      -
      -
      172.16.253.129
      1144.0
      173.231.54.69
      443.0
      emailAddress=root@10_01.lvqiucai,CN=10_01.lvqi...
      -
      1370544430.000000
      1339008430.000000
      BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...
      -
      -
      emailAddress=root@10_01.lvqiucai,CN=10_01.lvqi...
      APT
      CA8hE83VihcR5GQhfl
      self signed certificate
      TLSv10
    
    
      2013-01-05 22:33:53.736983
      -
      TLS_RSA_WITH_RC4_128_MD5
      -
      -
      172.16.253.129
      1146.0
      173.231.54.69
      443.0
      -
      -
      -
      -
      BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...
      -
      9ce8ba30850ef7ea6dbbd1d62b7d14f7ecad993264344d...
      -
      APT
      CzpdEO1dnC8Zu1mLl
      -
      TLSv10
    
    
      2013-01-05 22:35:55.292804
      -
      TLS_RSA_WITH_RC4_128_MD5
      -
      -
      172.16.253.129
      1147.0
      173.231.54.69
      443.0
      -
      -
      -
      -
      BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...
      -
      9ce8ba30850ef7ea6dbbd1d62b7d14f7ecad993264344d...
      -
      APT
      C8wxx64kUnuCg4QBH5
      -
      TLSv10
    
    
      2013-01-05 22:35:55.969867
      -
      TLS_RSA_WITH_RC4_128_MD5
      -
      -
      172.16.253.129
      1148.0
      173.231.54.69
      443.0
      -
      -
      -
      -
      BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...
      -
      9ce8ba30850ef7ea6dbbd1d62b7d14f7ecad993264344d...
      -
      APT
      CjpllO2Q90pX7GUrd6
      -
      TLSv10
    
    
      2013-02-03 21:50:15.703398
      b71150169f6bbe765d2541d79f92dc9b
      TLS_RSA_WITH_RC4_128_MD5
      -
      -
      172.16.253.130
      1068.0
      96.44.136.98
      443.0
      CN=eric-office
      -
      1.64449e+09
      1.32913e+09
      BIN_TrojanPage_86893886C7CBC7310F7675F4EFDE0A29
      -
      -
      CN=eric-office
      APT
      CbjBsy1JWM4tZOK96h
      unable to get local issuer certificate
      TLSv10



In [26]:

    
tunneldf.head()  # 2 records









    Out[26]:







  
    
      
      action
      id.orig_h
      id.orig_p
      id.resp_h
      id.resp_p
      sample
      threat
      tunnel_type
      uid
    
  
  
    
      2013-05-26 23:27:21.195452
      Tunnel::DISCOVER
      172.16.0.130
      61856.0
      157.56.149.60
      3544.0
      BIN_Wordpress_Mutopy_Symmi_20A6EBF61243B760DD6...
      CRIME
      Tunnel::TEREDO
      CDlYaUKSPEa6CZWm6
    
    
      2013-05-26 23:28:04.331021
      Tunnel::CLOSE
      172.16.0.130
      61856.0
      157.56.149.60
      3544.0
      BIN_Wordpress_Mutopy_Symmi_20A6EBF61243B760DD6...
      CRIME
      Tunnel::TEREDO
      CDlYaUKSPEa6CZWm6



In [29]:

    
weirddf.head(5)









    Out[29]:







  
    
      
      addl
      id.orig_h
      id.orig_p
      id.resp_h
      id.resp_p
      name
      notice
      peer
      sample
      threat
      uid
    
  
  
    
      2013-02-03 17:20:45.177587
      -
      -
      -
      -
      -
      unknown_protocol_2
      F
      bro
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      APT
      -
    
    
      2013-02-03 17:22:45.464994
      -
      172.16.253.129
      1044
      192.200.99.194
      80
      bad_HTTP_request
      F
      bro
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      APT
      CyOOQu1OSt9tTdmo4c
    
    
      2013-02-03 17:22:45.464994
      -
      172.16.253.129
      1044
      192.200.99.194
      80
      line_terminated_with_single_CR
      F
      bro
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      APT
      CyOOQu1OSt9tTdmo4c
    
    
      2013-02-03 17:23:50.546823
      -
      172.16.253.129
      1046
      192.200.99.194
      80
      bad_HTTP_request
      F
      bro
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      APT
      CrotRO2AvrA4bneEKl
    
    
      2013-02-03 17:23:50.546823
      -
      172.16.253.129
      1046
      192.200.99.194
      80
      line_terminated_with_single_CR
      F
      bro
      BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1
      APT
      CrotRO2AvrA4bneEKl



In [ ]:

    
### Save DFs as



In [35]:

    
## Connections DF



In [37]:

    
conndf.sort_index(inplace=True)
conndf.info()









    



<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 71645 entries, 1969-12-31 19:00:06.985435 to NaT
Data columns (total 21 columns):
conn_state        71645 non-null object
duration          71645 non-null object
history           71645 non-null object
id.orig_h         71645 non-null object
id.orig_p         71645 non-null float64
id.resp_h         71645 non-null object
id.resp_p         71645 non-null float64
local_orig        71645 non-null object
missed_bytes      71645 non-null float64
orig_bytes        71645 non-null object
orig_ip_bytes     71645 non-null float64
orig_pkts         71645 non-null float64
proto             71645 non-null object
resp_bytes        71645 non-null object
resp_ip_bytes     71645 non-null float64
resp_pkts         71645 non-null float64
sample            71645 non-null object
service           71645 non-null object
threat            71645 non-null object
tunnel_parents    71645 non-null object
uid               71645 non-null object
dtypes: float64(7), object(14)
memory usage: 12.0+ MB



In [40]:

    
conndf.index









    Out[40]:





DatetimeIndex(['1969-12-31 19:00:06.985435', '1969-12-31 19:00:06.985573',
               '1969-12-31 19:00:07.022681', '1969-12-31 19:00:07.022862',
               '1969-12-31 19:00:07.189544', '1969-12-31 19:00:07.189727',
               '1969-12-31 19:00:07.608794', '1969-12-31 19:00:07.608950',
               '1969-12-31 19:00:08.501049', '1969-12-31 19:00:08.501193',
               ...
                                      'NaT',                        'NaT',
                                      'NaT',                        'NaT',
                                      'NaT',                        'NaT',
                                      'NaT',                        'NaT',
                                      'NaT',                        'NaT'],
              dtype='datetime64[ns]', length=71645, freq=None)



In [39]:

    
conndf.plot(y='duration')









    



-----------------------------------------
TypeErrorTraceback (most recent call last)
<ipython-input-39-8e3f81ae6199> in <module>()
----> 1 conndf.plot(y='duration')

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2625                           fontsize=fontsize, colormap=colormap, table=table,
   2626                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2627                           sort_columns=sort_columns, **kwds)
   2628     __call__.__doc__ = plot_frame.__doc__
   2629 

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   1867                  yerr=yerr, xerr=xerr,
   1868                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 1869                  **kwds)
   1870 
   1871 

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   1692         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   1693 
-> 1694     plot_obj.generate()
   1695     plot_obj.draw()
   1696     return plot_obj.result

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in generate(self)
    241     def generate(self):
    242         self._args_adjust()
--> 243         self._compute_plot_data()
    244         self._setup_subplots()
    245         self._make_plot()

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in _compute_plot_data(self)
    350         if is_empty:
    351             raise TypeError('Empty {0!r}: no numeric data to '
--> 352                             'plot'.format(numeric_data.__class__.__name__))
    353 
    354         self.data = numeric_data

TypeError: Empty 'DataFrame': no numeric data to plot

Exploratory Visualization



In [61]:

    
import matplotlib
import matplotlib.pyplot
%matplotlib inline

import seaborn

Number of events



In [157]:

    
conndf.sort_index().index









    Out[157]:





DatetimeIndex(['2008-08-29 14:13:19.056673', '2008-08-29 14:17:24.420835',
               '2008-08-29 14:18:47.254929', '2008-08-29 14:20:28.463449',
               '2008-08-29 14:20:32.926812', '2008-08-29 14:22:56.133161',
               '2010-02-13 09:23:18.490261', '2010-02-13 09:23:18.595196',
               '2010-02-13 09:23:24.034844', '2010-02-13 09:23:39.455201',
               ...
               '2013-09-25 22:44:01.409078', '2013-09-25 22:44:02.028646',
               '2013-09-25 22:44:02.648104', '2013-09-25 22:44:03.354803',
               '2013-09-25 22:44:03.984858', '2013-09-25 22:44:04.609268',
               '2013-09-25 22:44:05.197919', '2013-09-25 22:44:05.731911',
               '2013-09-25 22:44:06.329862', '2013-09-25 22:44:06.949527'],
              dtype='datetime64[ns]', length=51743, freq=None)



In [158]:

    
conndf.shape









    Out[158]:





(51743, 21)



In [160]:

    
hourly = conndf.groupby(pd.Grouper(freq='H')).count()
daily = hourly.groupby(pd.Grouper(freq='D')).mean()



In [ ]:

    
sns.countplot(x=)



In [176]:

    
hourly['2012':'2012'].uid.plot(kind='line', figsize=(15,5))









    Out[176]:





<matplotlib.axes._subplots.AxesSubplot at 0x12cd9bd30>



In [170]:









    Out[170]:





threat    51743
dtype: int64



In [172]:

    
# Plot the average value by condition and date
ax = df.groupby(["threat",pd.Grouper(freq='H')]).count().plot()









    



-------------
TypeErrorTraceback (most recent call last)
<ipython-input-172-922a04d32366> in <module>()
      1 # Plot the average value by condition and date
----> 2 ax = df.groupby(["threat",pd.Grouper(freq='H')]).count().plot()

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2625                           fontsize=fontsize, colormap=colormap, table=table,
   2626                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2627                           sort_columns=sort_columns, **kwds)
   2628     __call__.__doc__ = plot_frame.__doc__
   2629 

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   1867                  yerr=yerr, xerr=xerr,
   1868                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 1869                  **kwds)
   1870 
   1871 

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   1692         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   1693 
-> 1694     plot_obj.generate()
   1695     plot_obj.draw()
   1696     return plot_obj.result

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in generate(self)
    241     def generate(self):
    242         self._args_adjust()
--> 243         self._compute_plot_data()
    244         self._setup_subplots()
    245         self._make_plot()

/Users/carrie/anaconda/envs/mlbook/lib/python3.5/site-packages/pandas/plotting/_core.py in _compute_plot_data(self)
    350         if is_empty:
    351             raise TypeError('Empty {0!r}: no numeric data to '
--> 352                             'plot'.format(numeric_data.__class__.__name__))
    353 
    354         self.data = numeric_data

TypeError: Empty 'DataFrame': no numeric data to plot



In [177]:

    
### Countplots



In [181]:

    
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

conn.log



In [179]:

    
conndf.select_dtypes(include=['object']).describe()









    Out[179]:







  
    
      
      conn_state
      duration
      history
      id.orig_h
      id.resp_h
      local_orig
      orig_bytes
      proto
      resp_bytes
      sample
      service
      threat
      tunnel_parents
      uid
    
  
  
    
      count
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
      51743
    
    
      unique
      13
      38352
      174
      111
      3861
      1
      3479
      3
      4629
      95
      9
      3
      2
      51743
    
    
      top
      RSTO
      -
      ShAfR
      192.168.248.165
      10.0.2.255
      -
      0
      tcp
      -
      BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321B...
      -
      CRIME
      (empty)
      Ccf1Kq4YilW9TJ9di6
    
    
      freq
      20739
      10920
      9974
      23641
      2756
      51743
      21169
      43381
      10920
      17510
      31585
      48725
      51741
      1



In [184]:

    
fig, ax = plt.subplots(figsize=(15, 10))
sns.countplot(ax=ax, x="conn_state", data=conndf)
plt.xticks(rotation=90)









    Out[184]:





(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]),
 <a list of 13 Text xticklabel objects>)



In [186]:

    
fig, ax = plt.subplots(figsize=(5, 5))
sns.countplot(ax=ax, x="proto", data=conndf)
plt.xticks(rotation=90)









    Out[186]:





(array([0, 1, 2]), <a list of 3 Text xticklabel objects>)



In [187]:

    
fig, ax = plt.subplots(figsize=(15, 10))
sns.countplot(ax=ax, x="service", data=conndf)
plt.xticks(rotation=90)









    Out[187]:





(array([0, 1, 2, 3, 4, 5, 6, 7, 8]), <a list of 9 Text xticklabel objects>)



In [188]:

    
fig, ax = plt.subplots(figsize=(15, 10))
sns.countplot(ax=ax, x="threat", data=conndf)
plt.xticks(rotation=90)









    Out[188]:





(array([0, 1, 2]), <a list of 3 Text xticklabel objects>)



In [ ]:

    
#### dns.log



In [ ]:

    
dnsdf.select_dtypes(include=['object']).describe()

	conn_state	duration	history	id.orig_h	id.orig_p	id.resp_h	id.resp_p	local_orig	orig_bytes	...	orig_pkts	proto	resp_bytes	resp_ip_bytes	resp_pkts	sample	service	threat	tunnel_parents	uid
2013-02-03 17:20:28.793176	SF	21.708771	Dd	172.16.253.129	68.0	172.16.253.254	67.0	-	301	...	1.0	udp	900	984.0	3.0	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	dhcp	APT	(empty)	CI9ceD4wNSKUHA3wmd
2013-02-03 17:20:49.634244	S0	0.863748	D	0.0.0.0	68.0	255.255.255.255	67.0	-	613	...	2.0	udp	0	0.0	0.0	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	dhcp	APT	(empty)	CmHWaWNDdsoHVqVsk
2013-02-03 17:20:49.635240	OTH	0.000007	-	172.16.253.254	8.0	172.16.253.129	0.0	-	40	...	2.0	icmp	0	0.0	0.0	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	-	APT	(empty)	C82nAi1NNM7gxFM7T2
2013-02-03 17:22:44.875318	SF	0.115893	Dd	172.16.253.129	53.0	8.8.8.8	53.0	-	41	...	1.0	udp	57	85.0	1.0	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	dns	APT	(empty)	CgldPv2fme1uRfxBG4
2013-02-03 17:22:44.875379	SF	0.114048	Dd	172.16.253.129	53.0	4.2.2.2	53.0	-	41	...	1.0	udp	57	85.0	1.0	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	dns	APT	(empty)	CEAjpT2procZiZLkpl

	conn_state	duration	history	id.orig_h	id.resp_h	local_orig	orig_bytes	proto	resp_bytes	sample	service	threat	tunnel_parents	uid
count	51743	51743	51743	51743	51743	51743	51743	51743	51743	51743	51743	51743	51743	51743
unique	13	38352	174	111	3861	1	3479	3	4629	95	9	3	2	51743
top	RSTO	-	ShAfR	192.168.248.165	10.0.2.255	-	0	tcp	-	BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321B...	-	CRIME	(empty)	Ccf1Kq4YilW9TJ9di6
freq	20739	10920	9974	23641	2756	51743	21169	43381	10920	17510	31585	48725	51741	1

	id.orig_p	id.resp_p	missed_bytes	orig_ip_bytes	orig_pkts	resp_ip_bytes	resp_pkts
count	51743.000000	51743.000000	5.174300e+04	5.174300e+04	51743.000000	5.174300e+04	51743.000000
mean	3527.059892	1003.664302	1.562934e+02	1.257999e+03	6.740351	5.544762e+03	7.629109
std	7916.361162	3599.388695	3.542275e+04	5.133977e+04	86.552072	1.669139e+05	123.460882
min	1.000000	0.000000	0.000000e+00	0.000000e+00	0.000000	0.000000e+00	0.000000
25%	1356.000000	25.000000	0.000000e+00	7.800000e+01	1.000000	4.000000e+01	1.000000
50%	2207.000000	80.000000	0.000000e+00	1.680000e+02	4.000000	1.580000e+02	2.000000
75%	3319.000000	80.000000	0.000000e+00	5.080000e+02	5.000000	5.170000e+02	4.000000
max	65529.000000	63551.000000	8.057592e+06	1.074928e+07	13493.000000	3.046285e+07	20340.000000

	AA	RA	RD	TC	TTLs	answers	id.orig_h	id.orig_p	id.resp_h	...	qtype	qtype_name	query	rcode	rcode_name	rejected	sample	threat	trans_id	uid
2013-02-03 17:22:44.875379	F	T	T	F	700	192.200.99.194	172.16.253.129	53.0	4.2.2.2	...	1	A	sa.foundcloudsearch.com	0	NOERROR	F	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	APT	27825.0	CEAjpT2procZiZLkpl
2013-02-03 17:22:44.875318	F	T	T	F	2502	192.200.99.194	172.16.253.129	53.0	8.8.8.8	...	1	A	sa.foundcloudsearch.com	0	NOERROR	F	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	APT	27825.0	CgldPv2fme1uRfxBG4
2013-01-05 22:45:24.874503	F	F	F	F	-	-	172.16.253.129	1143.0	199.36.76.113	...	-	-	-	0	NOERROR	F	BIN_9002_D4ED654BCDA42576FDDFE03361608CAA_2013...	APT	0.0	CbH3Rf1eTZNnvfSek7
2013-01-05 22:45:44.391876	F	F	F	F	-	-	172.16.253.129	1143.0	199.36.76.113	...	-	-	-	-	-	T	BIN_9002_D4ED654BCDA42576FDDFE03361608CAA_2013...	APT	0.0	CbH3Rf1eTZNnvfSek7
2013-01-05 22:47:24.922913	F	F	F	F	-	-	172.16.253.129	1143.0	199.36.76.113	...	-	-	-	-	-	F	BIN_9002_D4ED654BCDA42576FDDFE03361608CAA_2013...	APT	0.0	CbH3Rf1eTZNnvfSek7

	AA	RA	RD	TC	TTLs	answers	id.orig_h	id.resp_h	proto	qclass	...	qtype	qtype_name	query	rcode	rcode_name	rejected	sample	threat	ts	uid
count	35101	35101	35101	35101	35101	35101	35101	35101	35101	35101	...	35101	35101	35101	35101	35101	35101	35101	35101	0.0	35101
unique	2	2	2	1	3937	3164	42	50	2	3	...	13	9	3195	10	7	2	82	2	0.0	4803
top	F	F	T	F	-	-	192.168.248.165	10.0.2.255	udp	1	...	32	NB	NOLOGO1093.COM	-	-	F	BIN_CitadelPacked_2012-05	CRIME	NaN	CEAizT2ntuLyinqtPa
freq	35098	27552	32444	35101	27706	27706	13673	13013	34714	26103	...	13875	15167	6544	22183	22183	34395	13395	33893	NaN	3415

	Z	id.orig_p	id.resp_p	trans_id
count	35101.000000	35101.000000	35101.000000	35101.000000
mean	0.396257	1486.173984	121.646705	32466.772314
std	0.489126	8516.369410	411.977287	13436.185471
min	0.000000	53.000000	53.000000	0.000000
25%	0.000000	53.000000	53.000000	27352.000000
50%	0.000000	137.000000	53.000000	33968.000000
75%	1.000000	137.000000	137.000000	36884.000000
max	1.000000	65529.000000	5355.000000	65534.000000

	analyzers	conn_uids	duration	extracted	filename	fuid	is_orig	local_orig	md5	...	rx_hosts	sample	seen_bytes	sha1	sha256	source	threat	timedout	total_bytes	tx_hosts
2012-10-07 10:45:05.260590	MD5,SHA1	ClpERK2U3EJh2Sp7zj	0.000000	-	-	FF3ky62ppNbz5xz0Z2	F	-	4dec45889e09ec3ceb63fd65825d0f11	...	172.16.253.130	BIN_DNSWatch_protux_4F8A44EF66384CCFAB737C8D7A...	564.0	d80eaf048573a410cb6c49ebb859280d04b6113c	-	HTTP	APT	F	564	82.96.118.210
2011-07-19 19:25:04.678615	SHA1,MD5	CFxQyaLHN8gDJbqQ3	0.000000	-	-	Fx05J53kcY5j1YUwfc	F	-	7eb1866f3e37ba63e7d9a2762536d0e4	...	172.29.0.116	BIN_LetsGo_yahoosb_b21ba443726385c11802a8ad731...	131.0	413dcf4056b982ffd385a92d265458fc66f1ac5d	-	HTTP	APT	F	131	58.64.232.19
2011-07-19 19:25:04.974835	SHA1,MD5	CFxQyaLHN8gDJbqQ3	11.564372	-	-	F5Vnf2F0SEKDwTpKb	F	-	2eca000bd0a628a6905f898343a4a449	...	172.29.0.116	BIN_LetsGo_yahoosb_b21ba443726385c11802a8ad731...	282317.0	49ded6741d9996eb759c5e2b6ab2f6ec2e5faf13	-	HTTP	APT	F	289628	58.64.232.19
2012-10-07 10:34:52.937534	MD5,SHA1	CUBtNl42Jz57iOSkb5	0.000000	-	-	F6UVLD2KuRsWDNNgRj	F	-	23d6b92bc7eb100fc1294e6b124b7e75	...	172.16.253.129	BIN_Likseput_E019E37F19040059AB5662563F06B609_...	1635.0	f0649f9495d1f566a3f690002050b87800b4bce2	-	HTTP	APT	F	1635	205.209.161.13
2012-10-07 17:50:41.483734	SHA1,MD5	CtH2QO2HwW4Zu2gVl1	0.000000	-	-	FOvQBu21DhExBT6OAh	F	-	2e8ba2d348ff0ae9e930efd1c7218d25	...	172.16.253.240	BIN_Mediana_0AE47E3261EA0A2DBCE471B28DFFE007_2...	522.0	ad5c88987c80091eff1cf7915ef346016dcd644b	-	HTTP	APT	F	522	118.69.36.139

	analyzers	conn_uids	extracted	filename	fuid	is_orig	local_orig	md5	mime_type	parent_fuid	rx_hosts	sample	sha1	sha256	source	threat	timedout	total_bytes	ts	tx_hosts
count	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	19087	0.0	19087
unique	3	10005	1	54	19076	2	1	8884	26	1	363	77	8884	1	3	3	2	3552	0.0	1449
top	SHA1,MD5	CH79DM3zLqVjZZQTe4	-	-	FwQUph2Sl6wHRXzhNj	F	-	e6e2b839691b59a1f28adbe3946ed168	text/plain	-	172.29.0.116	purplehaze	f82bbc37c4deec14342dc6bb83ccf5fa3453d55a	-	HTTP	CRIME	F	-	NaN	65.55.37.120
freq	11665	180	19087	19023	9	16738	19087	1889	7351	19087	9241	9141	1889	19087	15281	18972	19081	6910	NaN	1856

	depth	duration	missing_bytes	overflow_bytes	seen_bytes
count	19087.000000	19087.000000	19087.00000	19087.0	1.908700e+04
mean	0.516058	0.074782	1.66967	0.0	1.103566e+04
std	1.193498	1.264469	212.06107	0.0	1.374042e+05
min	0.000000	0.000000	0.00000	0.0	0.000000e+00
25%	0.000000	0.000000	0.00000	0.0	1.250000e+02
50%	0.000000	0.000000	0.00000	0.0	7.500000e+02
75%	0.000000	0.000561	0.00000	0.0	2.139000e+03
max	28.000000	96.395087	29200.00000	0.0	9.823962e+06

	analyzers	conn_uids	duration	filename	fuid	is_orig	md5	mime_type	rx_hosts	sample	seen_bytes	sha1	source	threat	timedout	total_bytes	tx_hosts
2008-08-29 14:13:19.062835	MD5,SHA1	CPeRxP175U0yYVakOb	0.000000	-	Fnkq5O2AudT9jKaGzf	F	832121a4d54b100333d1a735105f88c2	text/html	192.168.0.22	Microsoft_SQL_Server_Distributed_Management_Ob...	2767.0	d631518d7222a213a38a0c6f269bc7f6aeebd047	HTTP	METASPLOIT	F	2767	192.168.0.15
2008-08-29 14:17:24.426910	MD5,SHA1	C7ihYF3xcZL9DKkei	1.825023	-	Fef9fG1P0WvDiHsgW	F	f30dbac8dfb321cc3ad7c8d80d131104	text/plain	192.168.0.22	iMesh_7.1.0.x(IMWeb.dll_7.0.0.x)_Remote_Heap_O...	4831.0	9cc71a6c6e1608a6e44b3f4128e932f15e12108e	HTTP	METASPLOIT	F	4831	192.168.0.15
2008-08-29 14:18:47.261285	MD5,SHA1	CNHkrR3vyXKjNqqqgh	0.000000	-	FXdjVH10pT9gjAt2p6	F	ea95d16406510979d2dbcba148ec65d4	text/html	192.168.0.22	NUVICO_DVR_NVDV4__PdvrAtl_Module_(PdvrAt.DLL_1...	2494.0	8f41aa8ac666cbdefede034a7a3cd8a23270646b	HTTP	METASPLOIT	F	2494	192.168.0.15
2008-08-29 14:20:28.469907	SHA1,MD5	CGAPRy3uHLtMEv0hh4	0.000000	-	FPZcO9wMy7NtiMek	F	b6455078596a63579edd93053688c664	text/html	192.168.0.22	Sejoong_Namo_ActiveSquare_6_NamoInstaller.dll-...	1550.0	6bc5bf13ecf95a7f0f151789bcb099408569bed5	HTTP	METASPLOIT	F	1550	192.168.0.15
2008-08-29 14:20:32.933439	SHA1,MD5	CUBJsy3BQVEkizsKtc	0.000000	-	FC0N0T2JFiZcFr63x6	F	60784130c96e15301590ed3d757dbed7	text/html	192.168.0.22	Yahoo_Music_Jukebox_2.2-AddImage()_ActiveX_Rem...	2811.0	0745e344d2b7ff1ed21ee9bb5fbf4bcca06d5c4c	HTTP	METASPLOIT	F	2811	192.168.0.15

	arg	command	data_channel.orig_h	data_channel.passive	data_channel.resp_h	data_channel.resp_p	file_size	fuid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	mime_type	password	reply_code	reply_msg	sample	threat	uid	user
1969-12-31 19:00:25.910034	10,0,2,15,4,24	PORT	109.234.159.254	F	10.0.2.15	1048	-	-	10.0.2.15	1047.0	109.234.159.254	21.0	-	<hidden>	500.0	\xd0\x9d\xd0\xb5\xd0\xbf\xd1\x80\xd0\xb0\xd0\x...	BIN_Reedum_0ca4f93a848cf01348336a8c6ff22daf_20...	CRIME	Cgtteu3aOCDB2bcjXb	user37704
2013-03-07 05:58:39.873228	-	PASV	10.0.2.15	T	193.109.247.77	44279	-	-	10.0.2.15	1040.0	193.109.247.77	21.0	-	<hidden>	227.0	Entering Passive Mode (193,109,247,77,172,247)	BIN_UStealD_2b796f11f15e8c73f8f69180cf74b39d	CRIME	CD9RqvrS2B3ppkDXf	0jeck1072
2013-03-07 05:58:40.110549	ftp://193.109.247.77/./home/zxcvb/sdfhdrjf/NO_...	STOR	-	-	-	-	-	F0Lt5h2pYsjpmv03el	10.0.2.15	1040.0	193.109.247.77	21.0	binary	<hidden>	226.0	Logout.	BIN_UStealD_2b796f11f15e8c73f8f69180cf74b39d	CRIME	CD9RqvrS2B3ppkDXf	0jeck1072

	filename	host	id.orig_h	id.orig_p	id.resp_h	id.resp_p	info_code	info_msg	method	orig_fuids	...	status_code	status_msg	tags	threat	trans_depth	ts	uid	uri	user_agent	username
2008-08-29 14:13:19.060122	-	freak	192.168.0.22	3590.0	192.168.0.15	80.0	-	-	GET	-	...	200	OK	(empty)	METASPLOIT	1.0	NaN	CPeRxP175U0yYVakOb	/spoil/4398.htm	Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...	-
2008-08-29 14:17:24.424171	-	freak	192.168.0.22	3750.0	192.168.0.15	80.0	-	-	GET	-	...	200	OK	(empty)	METASPLOIT	1.0	NaN	C7ihYF3xcZL9DKkei	/spoil/4752.htm	Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...	-
2008-08-29 14:18:47.259045	-	freak	192.168.0.22	3817.0	192.168.0.15	80.0	-	-	GET	-	...	200	OK	(empty)	METASPLOIT	1.0	NaN	CNHkrR3vyXKjNqqqgh	/spoil/4903.htm	Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...	-
2008-08-29 14:20:28.467682	-	freak	192.168.0.22	3912.0	192.168.0.15	80.0	-	-	GET	-	...	200	OK	(empty)	METASPLOIT	1.0	NaN	CGAPRy3uHLtMEv0hh4	/spoil/5045.htm	Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...	-
2008-08-29 14:20:32.930681	-	freak	192.168.0.22	3917.0	192.168.0.15	80.0	-	-	GET	-	...	200	OK	(empty)	METASPLOIT	1.0	NaN	CUBJsy3BQVEkizsKtc	/spoil/5048.htm	Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...	-

	actions	dropped	dst	file_desc	file_mime_type	fuid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	...	remote_location.country_code	remote_location.latitude	remote_location.longitude	remote_location.region	sample	src	sub	suppress_for	threat	uid
2013-01-05 22:33:52.306981	Notice::ACTION_LOG	F	173.231.54.69	-	-	-	172.16.253.129	1144	173.231.54.69	443	...	-	-	-	-	BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...	172.16.253.129	emailAddress=root@10_01.lvqiucai,CN=10_01.lvqi...	3600.0	APT	CA8hE83VihcR5GQhfl
2013-02-03 21:50:15.936942	Notice::ACTION_LOG	F	96.44.136.98	-	-	-	172.16.253.130	1068	96.44.136.98	443	...	-	-	-	-	BIN_TrojanPage_86893886C7CBC7310F7675F4EFDE0A29	172.16.253.130	CN=eric-office	3600.0	APT	CbjBsy1JWM4tZOK96h
2012-03-04 01:48:35.169930	Notice::ACTION_LOG	F	199.192.156.134	-	-	-	10.0.2.15	1381	199.192.156.134	443	...	-	-	-	-	Mswab_Yayih_FD1BE09E499E8E380424B3835FC973A8_2...	10.0.2.15	POST /bbs/info.asp HTTP/1.1^M^JHost: 199.192.1...	3600.0	APT	CjRS9R3lXpmpB3zoM1
2011-12-07 12:53:01.030172	Notice::ACTION_LOG	F	71.36.88.82	-	-	-	192.168.248.135	1080	71.36.88.82	443	...	-	-	-	-	PDF_CVE-2011-2462_Pdf_2011-12	192.168.248.135	emailAddress=marry.smith@ltu.edu,CN=ITU Server...	3600.0	APT	CvK5cnUyfWRYthydh
2012-10-04 10:27:06.802839	Notice::ACTION_LOG	F	62.65.252.16	-	-	-	192.168.248.165	1113	62.65.252.16	443	...	-	-	-	-	BIN_Cutwail-Pushdo(1)_582DE032477E099EB1024D84...	192.168.248.165	emailAddress=root@localhost.localdomain,CN=loc...	3600.0	CRIME	C3Z4pO1ogbk64yr3l3

	date	first_received	from	fuids	helo	id.orig_h	id.orig_p	id.resp_h	id.resp_p	in_reply_to	...	reply_to	sample	second_received	subject	threat	to	trans_depth	uid	user_agent	x_originating_ip
2012-10-07 11:41:30.928559	-	-	-	(empty)	comxp	172.16.253.129	1138.0	119.161.5.253	25.0	-	...	-	BIN_Sanny-Daws_338D0B855421867732E05399A2D5667...	-	-	APT	-	1.0	CftBECCezhXLmn0mk	-	-
2012-10-07 11:41:25.734884	-	-	-	(empty)	comxp	172.16.253.129	1136.0	119.161.5.253	25.0	-	...	-	BIN_Sanny-Daws_338D0B855421867732E05399A2D5667...	-	-	APT	-	1.0	CbTJom4G7Z1VlFRTQ2	-	-
2013-02-03 17:21:34.988497	Thu, 6 Jun 2013 9:46:41 --0400	-	<linux06400@yahoo.com>	FzYuGG1z0tXTRdpYYa,FhyrAm2El1XO5EppKb,Fl1Nbz4c...	DELLXT	172.16.253.129	1043.0	63.250.193.228	587.0	-	...	-	BIN_ArdamaxKeylogger_E33AF9E602CBB7AC3634C2608...	-	Logs from "Laura"	CRIME	<linux06400@yahoo.com>	1.0	CTnh6S15ye0PetpT37	Microsoft Outlook Express 6.00.2900.2527	-
2012-10-04 10:31:21.846949	Fri, 05 Jul 2013 10:44:58 -0400	-	Next Day Air Saver <message_id16@olgapost.com>	F7H5tDVaYoYDcVV17,FOkGj71hWX7IW97PJf	olgapost.com	192.168.248.165	1136.0	65.54.188.72	25.0	-	...	-	BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321B...	-	Delivery Status Notification	CRIME	<wackcircadian@hotmail.com>	1.0	CgRXC74fB2u221w76	Microsoft Outlook Express 6.00.2900.2180	-
2012-10-04 10:31:21.851972	Fri, 05 Jul 2013 10:44:58 -0400	-	Expedited Shipping <federal_id27@scooterspost....	FycO1144ov4qK4VXZ7,FClo3r2q71JhTXDjF2	scooterspost.com	192.168.248.165	1140.0	65.55.37.120	25.0	-	...	-	BIN_Kuluoz-Asprox_9F842AD20C50AD1AAB41F20B321B...	-	Delivery Status Notification ID#EN55073086P	CRIME	<wackaloonusa@hotmail.com>	1.0	CXa3qf4JoCVQyjmEG	Microsoft Outlook Express 6.00.2900.2180	-

	cert_hash	cipher	client_issuer_subject	client_subject	id.orig_h	id.orig_p	id.resp_h	id.resp_p	issuer_subject	last_alert	not_valid_after	not_valid_before	sample	server_name	session_id	subject	threat	uid	validation_status	version
2013-01-05 22:33:52.068830	d666a272910c12b764a297f1e63c35c7	TLS_RSA_WITH_RC4_128_MD5	-	-	172.16.253.129	1144.0	173.231.54.69	443.0	emailAddress=root@10_01.lvqiucai,CN=10_01.lvqi...	-	1370544430.000000	1339008430.000000	BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...	-	-	emailAddress=root@10_01.lvqiucai,CN=10_01.lvqi...	APT	CA8hE83VihcR5GQhfl	self signed certificate	TLSv10
2013-01-05 22:33:53.736983	-	TLS_RSA_WITH_RC4_128_MD5	-	-	172.16.253.129	1146.0	173.231.54.69	443.0	-	-	-	-	BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...	-	9ce8ba30850ef7ea6dbbd1d62b7d14f7ecad993264344d...	-	APT	CzpdEO1dnC8Zu1mLl	-	TLSv10
2013-01-05 22:35:55.292804	-	TLS_RSA_WITH_RC4_128_MD5	-	-	172.16.253.129	1147.0	173.231.54.69	443.0	-	-	-	-	BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...	-	9ce8ba30850ef7ea6dbbd1d62b7d14f7ecad993264344d...	-	APT	C8wxx64kUnuCg4QBH5	-	TLSv10
2013-01-05 22:35:55.969867	-	TLS_RSA_WITH_RC4_128_MD5	-	-	172.16.253.129	1148.0	173.231.54.69	443.0	-	-	-	-	BIN_Enfal_Lurid_0fb1b0833f723682346041d72ed112...	-	9ce8ba30850ef7ea6dbbd1d62b7d14f7ecad993264344d...	-	APT	CjpllO2Q90pX7GUrd6	-	TLSv10
2013-02-03 21:50:15.703398	b71150169f6bbe765d2541d79f92dc9b	TLS_RSA_WITH_RC4_128_MD5	-	-	172.16.253.130	1068.0	96.44.136.98	443.0	CN=eric-office	-	1.64449e+09	1.32913e+09	BIN_TrojanPage_86893886C7CBC7310F7675F4EFDE0A29	-	-	CN=eric-office	APT	CbjBsy1JWM4tZOK96h	unable to get local issuer certificate	TLSv10

	action	id.orig_h	id.orig_p	id.resp_h	id.resp_p	sample	threat	tunnel_type	uid
2013-05-26 23:27:21.195452	Tunnel::DISCOVER	172.16.0.130	61856.0	157.56.149.60	3544.0	BIN_Wordpress_Mutopy_Symmi_20A6EBF61243B760DD6...	CRIME	Tunnel::TEREDO	CDlYaUKSPEa6CZWm6
2013-05-26 23:28:04.331021	Tunnel::CLOSE	172.16.0.130	61856.0	157.56.149.60	3544.0	BIN_Wordpress_Mutopy_Symmi_20A6EBF61243B760DD6...	CRIME	Tunnel::TEREDO	CDlYaUKSPEa6CZWm6

	addl	id.orig_h	id.orig_p	id.resp_h	id.resp_p	name	notice	peer	sample	threat	uid
2013-02-03 17:20:45.177587	-	-	-	-	-	unknown_protocol_2	F	bro	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	APT	-
2013-02-03 17:22:45.464994	-	172.16.253.129	1044	192.200.99.194	80	bad_HTTP_request	F	bro	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	APT	CyOOQu1OSt9tTdmo4c
2013-02-03 17:22:45.464994	-	172.16.253.129	1044	192.200.99.194	80	line_terminated_with_single_CR	F	bro	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	APT	CyOOQu1OSt9tTdmo4c
2013-02-03 17:23:50.546823	-	172.16.253.129	1046	192.200.99.194	80	bad_HTTP_request	F	bro	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	APT	CrotRO2AvrA4bneEKl
2013-02-03 17:23:50.546823	-	172.16.253.129	1046	192.200.99.194	80	line_terminated_with_single_CR	F	bro	BIN_8202_6d2c12085f0018daeb9c1a53e53fd4d1	APT	CrotRO2AvrA4bneEKl